pacman::p_load(tidyverse,data.table,broom,readxl,latex2exp,ggpubr,ggpmisc)
`%!in%` = Negate(`%in%`)
################################################################################

#Spatial units
spatial_id_a = 'county_id'

#Intermediaries
share_type = c('share_ij_tonnes') 
exporter_type = c('exporter_group')
destination_type = 'destination_bloc'
year_max=2018
id_type_var = c('county_known')

for(spatial_id_b in c('amc_id', 'microregion_id', 'mesoregion_id') ){
  
  df = fread(paste0('../../data/landuse/clean/geographicunits_argentina.csv.gz'))[, list(country, region, state_id, county_id)]
  setnames(df, old=spatial_id_a, new=c('spatial_id'))
  df = df[, list(spatial_id, region, state_id)]
  df_a_u = unique(df, by = "spatial_id")
    
  df = fread(paste0('../../data/landuse/clean/geographicunits_brazil.csv.gz'))[, list(country, region, state_id, mesoregion_id, microregion_id, amc_id, county_id)]
  setnames(df, old=spatial_id_b, new=c('spatial_id'))
  df = df[, list(spatial_id, region, state_id)]
  df_b_u = unique(df, by = "spatial_id")
  
  df_cw = rbind(df_a_u,df_b_u)
    
  
  ##################### Land shares
  
  for (country in c('argentina','brazil') ){
    
    df_l = fread(paste0('../../data/landuse/clean/landuse_',country,'_county.csv.gz'))
    df_u = fread(paste0('../../data/landuse/clean/geographicunits_',country,'.csv.gz'))
  
    if (country=='argentina'){
      
      #Spatial unit crosswalk
      df_u = df_u[, list(county_id, state_id, region, land_area_km2)]
      
      #Land
      df = merge(df_l, df_u, by='county_id')
      df = df[, c('forest','pasture') := list(forest_natural+forest_planted, pasture+forage_seasonal+forage_permanent)]
      setnames(df, old = c(spatial_id_a),  new = c('spatial_id'))
      df = df[,  list(year, region, state_id, spatial_id, pasture, forest, soybean, maize)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, region, state_id, spatial_id)][, unit:='hectares']
      setnames(df, old=c('soybean','maize','pasture'), new=c('soybean_ha','maize_ha','pasture_ha'))
      df[, c('agriculture_ha','nature_ha'):=list(soybean_ha+maize_ha+pasture_ha,forest)]
      df[, c('soybean','maize','pasture','share_agriculture'):=list(soybean_ha/agriculture_ha, maize_ha/agriculture_ha, pasture_ha/agriculture_ha, agriculture_ha/(agriculture_ha+nature_ha) )]
      df = df[,list(year, country, region, state_id, spatial_id, soybean, maize, pasture, share_agriculture)][is.finite(share_agriculture),]
      df_commodity = melt(df[,list(year, spatial_id, soybean, maize, pasture)], id.vars=c("year","spatial_id"), variable.name="commodity", value.name="share_commodity")
      df_nests = df[,list(year, country, region, state_id, spatial_id, share_agriculture)]
      df_land = merge(df_nests,df_commodity, by=c('year','spatial_id'))
      
      df_ar = df_land
      df_ar$country = 'ARGENTINA'
      
    }else{
      
      #Spatial unit crosswalk
      df_u = df_u[, list(county_id, amc_id, microregion_id, mesoregion_id, state_id, region, land_area_km2)]
      
      #Land
      df = merge(df_l, df_u, by='county_id')
      df = df[, c('pasture','forest') := list(pasture_natural+pasture_planted,forest_natural+forest_planted)]
      setnames(df, old = c(spatial_id_b),  new = c('spatial_id'))
      df = df[,  list(year, region, state_id, spatial_id, pasture, forest,soybean, maize)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, region, state_id, spatial_id)][, unit:='hectares']
      setnames(df, old=c('soybean','maize','pasture'), new=c('soybean_ha','maize_ha','pasture_ha'))
      df[, c('agriculture_ha','nature_ha'):=list(soybean_ha+maize_ha+pasture_ha,forest)]
      df[, c('soybean','maize','pasture','share_agriculture'):=list(soybean_ha/agriculture_ha, maize_ha/agriculture_ha, pasture_ha/agriculture_ha, agriculture_ha/(agriculture_ha+nature_ha) )]
      df = df[,list(year, country, region, state_id, spatial_id, soybean, maize, pasture, share_agriculture)][is.finite(share_agriculture),]
      df_commodity = melt(df[,list(year, spatial_id, soybean, maize, pasture)], id.vars=c("year","spatial_id"), variable.name="commodity", value.name="share_commodity")
      df_nests = df[,list(year, country, region, state_id, spatial_id, share_agriculture)]
      df_land = merge(df_nests,df_commodity, by=c('year','spatial_id'))
  
      df_br = df_land
      df_br$country = 'BRAZIL'
      
    }
  }
  
  df_land = rbind(df_ar,df_br)
  
  
  ##################### Output elasticities
  
  if(spatial_id_b!='amc_id'){
    
    df = fread('inputs/parameters_supply.csv')[,list(spatial_id, theta_lambda, lambda, theta, frontier)]
    df_u = fread(paste0('../../data/landuse/clean/geographicunits_brazil.csv.gz'))[, list(region, state_id, mesoregion_id, microregion_id, amc_id, county_id)]
    setnames(df_u, old='amc_id', new='spatial_id')
    df = merge(df, df_u, by='spatial_id', all.y=T)[,-c('spatial_id')]
    setnames(df, old=spatial_id_b, new='spatial_id')
    dfb=df[,list(spatial_id, theta_lambda, lambda, theta, frontier)]
    dfa = fread('inputs/parameters_supply.csv')[,list(spatial_id, theta_lambda, lambda, theta, frontier)][like(spatial_id,"AR")]
    df = rbind(dfa,dfb)
    df_params = df[, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id)]
    
  }else{
    
    df = fread('inputs/parameters_supply.csv')[,list(spatial_id, theta_lambda, lambda, theta, frontier)]
    df_params = df[, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id)]
    
  }
  
  df = merge(df_land,df_params,  by=c('spatial_id'))
  df_factors = setDT(read_excel('../../data/factors/raw/factor_shares.xlsx'))[,c('commodity','land_share')]
  df = merge(df,df_factors, by=c('commodity'))

  df[, c('elasticity_QP_i'):=list(  ( (theta_lambda-1)*(1-share_commodity) + (theta-1)*share_commodity*(1-share_agriculture)+(1-land_share) )/land_share ) ]
  df[, c('elasticity_PQ_i'):=list( 1/elasticity_QP_i ) ]
  df[commodity=='pasture',]$commodity = 'beef'
  df_elasticity = df
  
  
  ##################### Market shares
  
  for (country in c('argentina','brazil') ){
    
    if (country=='argentina'){
      
      df = fread(paste0('../../data/agribusiness/clean/soybean_',country,'.csv.gz'))[id_type %in% id_type_var & state %!in% c('UNKNOWN','IMPORTS + STOCK') & destination_bloc != 'UNKNOWN',]
      setnames(df, old = c(spatial_id_a,exporter_type,destination_type),  new = c('spatial_id','firm','destination_unit'))
      
      df_n_ij = df[, list(year, spatial_id, destination_unit, commodity, firm)][ , .(N_ij = length(unique(firm))), by=list(year, spatial_id, destination_unit, commodity)]
      df_n_i = df[, list(year, spatial_id, commodity, firm)][ , .(N_i = length(unique(firm))), by=list(year, spatial_id, commodity)]
      df_n = merge(df_n_ij,df_n_i, by=c('year','spatial_id','commodity'), all.x=T)
      
      df_q_ij = df[, list(year, country, region, state_id, spatial_id, destination_unit, equivalent_tonnes, fob_usd, commodity)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, region, state_id, spatial_id, destination_unit, commodity)]
      df_q_i = df[, list(year, country, region, state_id, spatial_id, equivalent_tonnes, fob_usd, commodity)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, region, state_id, spatial_id, commodity)]
      df_s = merge(df_q_ij,df_q_i, by=c('year','country','region','state_id','spatial_id','commodity'))[, c('share_ij_tonnes','share_ij_usd','price_ij','q_ij','q_i') := list(equivalent_tonnes.x/equivalent_tonnes.y,fob_usd.x/fob_usd.y,fob_usd.x/equivalent_tonnes.x,equivalent_tonnes.x,equivalent_tonnes.y)][, list(year, country, region, state_id, spatial_id, destination_unit, share_ij_tonnes, q_ij, q_i, commodity)]
      
      df_a_t = merge(df_s,df_n, by=c('year','spatial_id','destination_unit','commodity'))
      df_a = df_a_t[year<=year_max,][, c("year") := NULL][, lapply(.SD, mean, na.rm=TRUE),  by=list(country, region, state_id, spatial_id, destination_unit, commodity)]
      
    }else{
      
      n_c=1
      for (commodity in c('soybean','maize','beef')) {
        
        if(commodity=='beef'){
          df = fread(paste0('../../data/agribusiness/clean/',commodity,'_',country,'.csv.gz'))[product_type %in% c('Fresh & frozen beef products','BEEF BONELESS') & id_type %in% id_type_var & state !='UNKNOWN STATE'  & destination != 'BRAZIL' & destination_bloc != 'UNKNOWN',]
          df$port = df$hub
        }else{
          df = fread(paste0('../../data/agribusiness/clean/',commodity,'_',country,'.csv.gz'))[id_type %in% id_type_var & state !='UNKNOWN STATE'  & destination != 'BRAZIL' & destination_bloc != 'UNKNOWN',]
        }
        setnames(df, old = c(spatial_id_b,exporter_type,destination_type),  new = c('spatial_id','firm','destination_unit'))
        
        df_n_ij = df[, list(year, spatial_id, destination_unit, commodity, firm)][ , .(N_ij = length(unique(firm))), by=list(year, spatial_id, destination_unit, commodity)]
        df_n_i = df[, list(year, spatial_id, commodity, firm)][ , .(N_i = length(unique(firm))), by=list(year, spatial_id, commodity)]
        df_n = merge(df_n_ij,df_n_i, by=c('year','spatial_id','commodity'), all.x=T)
        
        df_q_ij = df[, list(year, country, region, state_id, spatial_id, destination_unit, equivalent_tonnes, fob_usd, commodity)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, region, state_id, spatial_id, destination_unit, commodity)]
        df_q_i = df[, list(year, country, region, state_id, spatial_id,equivalent_tonnes,fob_usd, commodity)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, region, state_id, spatial_id, commodity)]
        df_s = merge(df_q_ij,df_q_i, by=c('year','country','region','state_id','spatial_id','commodity'))[, c('share_ij_tonnes','share_ij_usd','price_ij','q_ij','q_i') := list(equivalent_tonnes.x/equivalent_tonnes.y,fob_usd.x/fob_usd.y,fob_usd.x/equivalent_tonnes.x,equivalent_tonnes.x,equivalent_tonnes.y)][, list(year, country, region, state_id, spatial_id, destination_unit, share_ij_tonnes, q_ij, q_i, commodity)]
        
        df_b_t_c = merge(df_s,df_n, by=c('year','spatial_id','destination_unit','commodity'))
        df_b_c = df_b_t_c[year<=year_max,][, c("year") := NULL][, lapply(.SD, mean, na.rm=TRUE),  by=list(country, region, state_id, spatial_id, destination_unit, commodity)]
        
        if(n_c==1){
          df_b_t = df_b_t_c
          df_b = df_b_c
        }else{
          df_b_t = rbind(df_b_t,df_b_t_c)
          df_b = rbind(df_b,df_b_c)
        }
        n_c=n_c+1
      } 
    }
  }
  
  df = df_a_t[year==2018,]
  df = merge(df, df_a_u, by=c('spatial_id','region','state_id'), all.y=T)
  df$year=2018
  df[is.na(N_i),]$N_i= quantile(df$N_i,0.03,na.rm=T)
  df[is.na(N_ij),]$N_ij = quantile(df$N_ij,0.03,na.rm=T)
    
  df$commodity='soybean'
  df$country='ARGENTINA'
  df1 = df
  df2 = df
  df1$commodity='maize'
  df2$commodity='beef'
  df_a_t = rbind(df,df1,df2)
  
  df_shares = rbind(df_a_t,df_b_t)
    
    
  ##################### Markdowns
  
  df = merge(df_shares, df_elasticity, by=c('commodity','year','country','region','state_id','spatial_id'))
  setnames(df, old = c(share_type),  new = c('share_ij'))
  df$mu_i = (1+df$elasticity_PQ_i/df$N_i)^(-1)
  df[N_i==0,]$mu_i=NA
  df$mu_ij = (1+df$elasticity_PQ_i*df$share_ij/df$N_ij)^(-1)
  df[N_ij==0,]$mu_ij=NA
  df$N_i = as.numeric(df$N_i)
  df$N_ij = as.numeric(df$N_ij)
  df_final = df
  
  
  df1 = df_final[year==2018 & country=='ARGENTINA',]
  df2 = df_final[year==2017 & country=='BRAZIL',]
  df=rbind(df1,df2)
  df  = df[, list(year, commodity, country, region, spatial_id, N_i, q_i, share_commodity, share_agriculture, elasticity_QP_i, mu_i)][, lapply(.SD, mean, na.rm=TRUE), by=list(year, commodity, country, region, spatial_id)]
  df_i = melt(df, id.vars=c("year","commodity","country","region","spatial_id"), variable.name="value_type", value.name="value")[!is.na(value),]
        
  ##### Correlation with emissions intensity
  
  #Crosswalk to aggregate spatial units
  df_a_county = fread(paste0('../../data/landuse/clean/geographicunits_argentina.csv.gz'))[, list(county_id,state_id)]
  df_b_county = fread(paste0('../../data/landuse/clean/geographicunits_brazil.csv.gz'))[, list(state_id, mesoregion_id, microregion_id, amc_id, county_id)]
  
  #Emissions intensity measures
  ctype_list = c('aboveground','belowground')
  cmeasure = 'mean'
  df_em = fread("../../data/environmental/clean/carbonstocks_argentinabrazil.csv.gz")[type %in% ctype_list,]
  setnames(df_em, old=c(cmeasure), new=c('tC_per_ha'))
  df_em = df_em[, list(county_id, tC_per_ha)][, lapply(.SD, median, na.rm=TRUE), by=list(county_id)]
  
  df_a_county=merge(df_a_county,df_em[,list(county_id,tC_per_ha)],by='county_id')
  df_b_county=merge(df_b_county,df_em[,list(county_id,tC_per_ha)],by='county_id')
  setnames(df_a_county, old=spatial_id_a, new='spatial_id')
  setnames(df_b_county, old=spatial_id_b, new='spatial_id')
  df_em=rbind(df_a_county[,list(spatial_id,tC_per_ha)],df_b_county[,list(spatial_id,tC_per_ha)])[, lapply(.SD, mean), by=.(spatial_id)]
  
  #Scatterplot
  df = df_i[value_type=='mu_i',][,list(commodity,spatial_id,value)][, lapply(.SD, mean), by=.(commodity,spatial_id)]
  df = merge(df, df_em, by=c('spatial_id'))
  df$y_var=log(df$value)
  df$x_var=log(df$tC_per_ha)
  
  df_size = df_i[value_type=='q_i',][,list(commodity,spatial_id,value)][, lapply(.SD, mean), by=.(commodity,spatial_id)][,size_var:=value]
  df = merge(df, df_size[,list(commodity,spatial_id,size_var)], by=c('commodity','spatial_id'))
  
  for(commodity_var in c('beef','maize','soybean')){
    
        df_scatter = df[commodity==commodity_var,]
        x_max = quantile(df_scatter$x_var, 0.975, na.rm=T)
        x_min = quantile(df_scatter$x_var, 0.025, na.rm=T)
        y_max = quantile(df_scatter$y_var, 0.975, na.rm=T)
        y_min = quantile(df_scatter$y_var, 0.025, na.rm=T)
        
        if(spatial_id_b=='amc_id'){df_scatter = df_scatter[y_var>y_min,][x_var>x_min,]}
        
        fit_formula <- y ~ x
        fig=ggplot(data = df_scatter, aes(x = x_var, y=y_var) ) +
          stat_smooth(method = "lm", formula = y ~ x + I(x^2), color='black', linewidth = 1, se=F)+
          theme_minimal() +
          labs(x='log(tC/ha)', y=TeX('log(markdown \\mu)'), title = paste0('Upstream ',commodity_var, ' markets') )+
          theme(axis.title.x = element_text(hjust = 1, vjust=-1, size=9),
                axis.title.y = element_text(hjust = 0.95,  vjust=2, size=9),
                axis.text.x = element_text(size=8),
                axis.text.y = element_text(size=8),
                plot.title = element_text(size=10, hjust=0.5),
                aspect.ratio = 1,
                panel.grid.minor = element_blank())+
          stat_summary_bin(fun='mean', bins=20, color='darkred', size=2, geom='point')+
          guides(size = "none")
      
        fig_formatted=ggplot(data = df_scatter, aes(x = x_var, y=y_var) ) +
          stat_smooth(method = "lm", formula = y ~ x + I(x^2), color='black', linewidth = 1, se=F)+
          theme_minimal() +
          labs(x='log(tC/ha)', y=TeX('log(markdown \\mu)'), title='C. Corr. w/emissions intensity')+
          theme(axis.title.x = element_text(hjust = 1, vjust=-1, size=8),
                axis.title.y = element_text(hjust = 0.9, vjust=0, size=8),
                axis.text.x = element_text(size=7),
                axis.text.y = element_text(size=7),
                plot.title = element_text(size=9, hjust=1),
                aspect.ratio = 1.25,
                panel.grid.minor = element_blank())+
          stat_summary_bin(fun='mean', bins=20, color='black', size=1, geom='point')+
          guides(size = "none")
        
        if(commodity_var=='beef'){fig_b=fig}
        if(commodity_var=='maize'){fig_m=fig}
        if(commodity_var=='soybean'){fig_s=fig}
        
        if(commodity_var=='beef' & spatial_id_b=='amc_id'){figure3c=fig_formatted}
        
  }
  
  
  if(spatial_id_b=='amc_id'){
    df_amc = df_i
    fig_full = ggarrange(fig_b,fig_s,fig_m,nrow=1,ncol=3)
    ggsave(paste0("../../output/figures/appendix/figure4b_corre.pdf"), width = 9, height = 3)
    ggsave(paste0("../../output/figures/appendix/figure4b_corre.eps"), width = 9, height = 3)
    
    fig = ggarrange(fig_b,fig_s,fig_m,nrow=1,ncol=3)
    fig_amc = annotate_figure(fig, top = text_grob("Upstream markets defined at the AMC-level", size=16))}
  
  if(spatial_id_b=='microregion_id'){
    df_micro = df_i
    fig_micro = annotate_figure(ggarrange(fig_b,fig_m,fig_s,nrow=1,ncol=3), top = text_grob("Upstream markets defined at the microregion-level", size=16))}
  
  if(spatial_id_b=='mesoregion_id'){
    df_meso = df_i
    fig_meso = annotate_figure(ggarrange(fig_b,fig_m,fig_s,nrow=1,ncol=3), top = text_grob("Upstream markets defined at the mesoregion-level", size=16))}
    
}

fig_all = ggarrange(fig_amc,fig_micro,fig_meso,nrow=3,ncol=1)
ggsave(paste0("../../output/figures/appendix/figure3_mktdef.pdf"), width = 12, height = 14)
ggsave(paste0("../../output/figures/appendix/figure3_mktdef.eps"), width = 12, height = 14)


######## Comparison of markdowns across spatial units

for(scatter_variable in c('N_i','mu_i')){
  
  if(scatter_variable=='N_i'){scatter_variable_label='Number of firms'}
  if(scatter_variable=='mu_i'){scatter_variable_label=TeX('Markdown \\mu')}
    
  ### AMC to microregion
  df_b = fread(paste0('../../data/landuse/clean/geographicunits_brazil.csv.gz'))[, list(mesoregion_id, microregion_id, amc_id)]
  df_b = unique(df_b, by = "amc_id")
  dfx = merge(df_amc, df_b[,list(amc_id,microregion_id)], by.x='spatial_id', by.y='amc_id')[value_type==scatter_variable, list(microregion_id,commodity,value)][, lapply(.SD, mean), by=.(microregion_id,commodity)]
  dfy = df_micro[value_type==scatter_variable & country=='BRAZIL', list(spatial_id,commodity,value)]
  setnames(dfy, old='spatial_id', new='microregion_id')
  df = merge(dfx,dfy,by=c('microregion_id','commodity'))
  fit_formula <- y ~ x
  fig_amc_micro = ggplot(data = df, aes(x = value.x, y=value.y) ) +
    geom_point(alpha = 0.5) +
    stat_poly_line(formula = fit_formula, se=F, color='darkred', linewidth=1)+
    stat_poly_eq(formula = fit_formula, aes(label = paste(..eq.label..)), size=4.25, color='darkred', label.x = 0.95, label.y=0.3)+
    stat_poly_eq(formula = fit_formula, aes(label = paste(..rr.label..)), size=4.25, color='darkred', label.x = 0.95, label.y=0.2)+
    geom_abline(aes(slope=1, intercept=0), linewidth=0.75, linetype='dashed', color='darkgrey') +
    theme_minimal() +
    labs(x='AMC market definition', y='microregion market definition', title=scatter_variable_label)+
    theme(axis.title.x = element_text(hjust = 1, size=12),
          axis.title.y = element_text(hjust=1, size=12),
          plot.title = element_text(hjust=0.5),
          aspect.ratio = 1,
          panel.grid.minor = element_blank(),
          legend.position = 'none',
          legend.title = element_blank())+
    guides(size = "none")
  
  ### AMC to mesoregion
  df_b = fread(paste0('../../data/landuse/clean/geographicunits_brazil.csv.gz'))[, list(mesoregion_id, microregion_id, amc_id)]
  df_b = unique(df_b, by = "amc_id")
  dfx = merge(df_amc, df_b[,list(amc_id,mesoregion_id)], by.x='spatial_id', by.y='amc_id')[value_type==scatter_variable, list(mesoregion_id,commodity,value)][, lapply(.SD, mean), by=.(mesoregion_id,commodity)]
  dfy = df_meso[value_type==scatter_variable & country=='BRAZIL', list(spatial_id,commodity,value)]
  setnames(dfy, old='spatial_id', new='mesoregion_id')
  df = merge(dfx,dfy,by=c('mesoregion_id','commodity'))
  fit_formula <- y ~ x
  fig_amc_meso = ggplot(data = df, aes(x = value.x, y=value.y) ) +
    geom_point(alpha = 0.5) +
    stat_poly_line(formula = fit_formula, se=F, color='darkred', linewidth=1)+
    stat_poly_eq(formula = fit_formula, aes(label = paste(..eq.label..)), size=4.25, color='darkred', label.x = 0.95, label.y=0.3)+
    stat_poly_eq(formula = fit_formula, aes(label = paste(..rr.label..)), size=4.25, color='darkred', label.x = 0.95, label.y=0.2)+
    geom_abline(aes(slope=1, intercept=0), linewidth=0.75, linetype='dashed', color='darkgrey') +
    theme_minimal() +
    labs(x='AMC market definition', y='mesoregion market definition', title=scatter_variable_label)+
    theme(axis.title.x = element_text(hjust = 1, size=12),
          axis.title.y = element_text(hjust=1, size=12),
          plot.title = element_text(hjust=0.5),
          aspect.ratio = 1,
          panel.grid.minor = element_blank(),
          legend.position = 'none',
          legend.title = element_blank())+
    guides(size = "none")
  
  ### Microregion to mesoregion
  df_b = fread(paste0('../../data/landuse/clean/geographicunits_brazil.csv.gz'))[, list(mesoregion_id, microregion_id)]
  df_b = unique(df_b, by = "microregion_id")
  dfx = merge(df_micro, df_b[,list(microregion_id,mesoregion_id)], by.x='spatial_id', by.y='microregion_id')[value_type==scatter_variable, list(mesoregion_id,commodity,value)][, lapply(.SD, mean), by=.(mesoregion_id,commodity)]
  dfy = df_meso[value_type==scatter_variable & country=='BRAZIL', list(spatial_id,commodity,value)]
  setnames(dfy, old='spatial_id', new='mesoregion_id')
  df = merge(dfx,dfy,by=c('mesoregion_id','commodity'))
  fit_formula <- y ~ x
  fig_micro_meso = ggplot(data = df, aes(x = value.x, y=value.y) ) +
    geom_point(alpha = 0.5) +
    stat_poly_line(formula = fit_formula, se=F, color='darkred', linewidth=1)+
    stat_poly_eq(formula = fit_formula, aes(label = paste(..eq.label..)), size=4.25, color='darkred', label.x = 0.95, label.y=0.3)+
    stat_poly_eq(formula = fit_formula, aes(label = paste(..rr.label..)), size=4.25, color='darkred', label.x = 0.95, label.y=0.2)+
    geom_abline(aes(slope=1, intercept=0), linewidth=0.75, linetype='dashed', color='darkgrey') +
    theme_minimal() +
    labs(x='microregion market definition', y='mesoregion market definition', title=scatter_variable_label)+
    theme(axis.title.x = element_text(hjust = 1, size=12),
          axis.title.y = element_text(hjust=1, size=12),
          plot.title = element_text(hjust=0.5),
          aspect.ratio = 1,
          panel.grid.minor = element_blank(),
          legend.position = 'none',
          legend.title = element_blank())+
    guides(size = "none")
  
  fig_all_spatialunits = ggarrange(fig_amc_micro,fig_amc_meso,nrow=1,ncol=2)
  if(scatter_variable=='N_i'){
    ggsave(paste0("../../output/figures/appendix/figure2b_mktdef.pdf"), width = 9, height = 5)}
  if(scatter_variable=='mu_i'){
    ggsave(paste0("../../output/figures/appendix/figure2a_mktdef.pdf"), width = 9, height = 5)}

}

#Combine all figures from main text
figure3 = ggarrange(figure3a,figure3b,figure3c,nrow=1,ncol=3)
ggsave(paste0("../../output/figures/figure3.pdf"), width = 5.5, height = 2.2, units='in')
ggsave(paste0("../../output/figures/figure3.eps"), width = 5.5, height = 2.2, units='in')

figure3_bw = ggarrange(figure3a_bw,figure3b,figure3c,nrow=1,ncol=3)
ggsave(paste0("../../output/figures/figure3_bw.pdf"), width = 5.5, height = 2.2, units='in')
ggsave(paste0("../../output/figures/figure3_bw.eps"), width = 5.5, height = 2.2, units='in')

